# K-means goodness of fit## The total within-cluster sum of squares (WSS) measures the compactness of the clusters. Lower values indicate better fit.## The more clusters derived, the lower the sum of squares## Trade of between fit and model simplicity?link_model_output <-function(model, model_id) { model$tot.withinss |>tibble() |>rename(tot.withinss =1) |>mutate(id = model_id)}link_model_output(kmeans_result_4, "clusters_4") |>union_all(link_model_output(kmeans_result_5 , "clusters_5")) |>union_all(link_model_output(kmeans_result_6 , "clusters_6")) |>union_all(link_model_output(kmeans_result_7 , "clusters_7")) |>union_all(link_model_output(kmeans_result_8 , "clusters_8")) |>union_all(link_model_output(kmeans_result_9 , "clusters_9")) |>union_all(link_model_output(kmeans_result_10, "clusters_10")) |>union_all(link_model_output(kmeans_result_11, "clusters_11")) |>union_all(link_model_output(kmeans_result_12, "clusters_12")) |>mutate(rn =row_number()) |>ggplot(aes(x =reorder(id, rn), y = tot.withinss)) +geom_col(width =0.01) +geom_point(size =5) +scale_y_continuous(labels = scales::comma) +theme_minimal() +theme(axis.text.x =element_text(angle =90, vjust =0.5)) +labs(x ="n_clusters",y ="Total within-cluster sum of squares (WSS)",title ="Comparing sum of squares Goodness of Fit measure",subtitle ="K-means cluster models" )
# Perform Principal Component Analysis (PCA)pca_result <-prcomp(key_measures_scaled_head, scale. =TRUE)# Create a dataframe with the principal componentspca_data <-as.data.frame(pca_result$x) |>mutate(cluster =as.factor(kmeans_result_7$cluster)) # Add cluster assignments to the dataframe# Create pairwise scatter plot matrixggpairs(pca_data, aes(color = cluster, alpha =0.5))
3D plot of top 3 principle components
# Create a dataframe with the first three principal componentspca_data_3d <- pca_data[, 1:3] |>mutate(cluster =as.factor(kmeans_result_7$cluster))pca_data_3d$cluster <-as.factor(kmeans_result_7$cluster)# Create 3D plotplot_ly(pca_data_3d, x =~PC1, y =~PC2, z =~PC3, color =~cluster#colors = c('#1f77b4', '#ff7f0e', '#2ca02c') ) %>%add_markers(size =1) %>%layout(scene =list(xaxis =list(title ='PC1'),yaxis =list(title ='PC2'),zaxis =list(title ='PC3')),title ='3D Plot of Principal Components')